{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import lightgbm as lgb\n",
    "import pandas as pd\n",
    "from sklearn.metrics import mean_squared_error"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading data...\n",
      "Starting training...\n",
      "[1]\tvalid_0's l2: 0.24288\tvalid_0's l1: 0.491812\n",
      "Training until validation scores don't improve for 5 rounds.\n",
      "[2]\tvalid_0's l2: 0.239307\tvalid_0's l1: 0.48798\n",
      "[3]\tvalid_0's l2: 0.235559\tvalid_0's l1: 0.483905\n",
      "[4]\tvalid_0's l2: 0.230771\tvalid_0's l1: 0.478533\n",
      "[5]\tvalid_0's l2: 0.226297\tvalid_0's l1: 0.47333\n",
      "[6]\tvalid_0's l2: 0.223692\tvalid_0's l1: 0.470079\n",
      "[7]\tvalid_0's l2: 0.220941\tvalid_0's l1: 0.466768\n",
      "[8]\tvalid_0's l2: 0.217982\tvalid_0's l1: 0.462971\n",
      "[9]\tvalid_0's l2: 0.215351\tvalid_0's l1: 0.459474\n",
      "[10]\tvalid_0's l2: 0.213064\tvalid_0's l1: 0.45619\n",
      "[11]\tvalid_0's l2: 0.211053\tvalid_0's l1: 0.4532\n",
      "[12]\tvalid_0's l2: 0.209336\tvalid_0's l1: 0.450526\n",
      "[13]\tvalid_0's l2: 0.207492\tvalid_0's l1: 0.447621\n",
      "[14]\tvalid_0's l2: 0.206016\tvalid_0's l1: 0.445178\n",
      "[15]\tvalid_0's l2: 0.204677\tvalid_0's l1: 0.442718\n",
      "[16]\tvalid_0's l2: 0.203224\tvalid_0's l1: 0.440293\n",
      "[17]\tvalid_0's l2: 0.201186\tvalid_0's l1: 0.437015\n",
      "[18]\tvalid_0's l2: 0.199626\tvalid_0's l1: 0.434725\n",
      "[19]\tvalid_0's l2: 0.198689\tvalid_0's l1: 0.432653\n",
      "[20]\tvalid_0's l2: 0.197377\tvalid_0's l1: 0.430279\n",
      "Did not meet early stopping. Best iteration is:\n",
      "[20]\tvalid_0's l2: 0.197377\tvalid_0's l1: 0.430279\n",
      "Saving model...\n",
      "Starting predicting...\n",
      "The rmse of prediction is: 0.44427172467679765\n"
     ]
    }
   ],
   "source": [
    "print('Loading data...')\n",
    "# load or create your dataset\n",
    "df_train = pd.read_csv('regression.train', header=None, sep='\\t')\n",
    "df_test = pd.read_csv('regression.test', header=None, sep='\\t')\n",
    "\n",
    "y_train = df_train[0]\n",
    "y_test = df_test[0]\n",
    "X_train = df_train.drop(0, axis=1)\n",
    "X_test = df_test.drop(0, axis=1)\n",
    "\n",
    "# create dataset for lightgbm\n",
    "lgb_train = lgb.Dataset(X_train, y_train)\n",
    "lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)\n",
    "\n",
    "# specify your configurations as a dict\n",
    "params = {\n",
    "    'boosting_type': 'gbdt',\n",
    "    'objective': 'regression',\n",
    "    'metric': {'l2', 'l1'},\n",
    "    'num_leaves': 31,\n",
    "    'learning_rate': 0.05,\n",
    "    'feature_fraction': 0.9,\n",
    "    'bagging_fraction': 0.8,\n",
    "    'bagging_freq': 5,\n",
    "    'verbose': 0\n",
    "}\n",
    "\n",
    "print('Starting training...')\n",
    "# train\n",
    "gbm = lgb.train(params,\n",
    "                lgb_train,\n",
    "                num_boost_round=20,\n",
    "                valid_sets=lgb_eval,\n",
    "                early_stopping_rounds=5)\n",
    "\n",
    "print('Saving model...')\n",
    "# save model to file\n",
    "gbm.save_model('model.txt')\n",
    "\n",
    "print('Starting predicting...')\n",
    "# predict\n",
    "y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)\n",
    "# eval\n",
    "print('The rmse of prediction is:', mean_squared_error(y_test, y_pred) ** 0.5)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.2"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
